# -*- coding: utf-8 -*-
import math
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.catalog_dao import CatalogDao
from zc_core.dao.supplier_dao import SupplierDao
from zc_core.spiders.base import BaseSpider
from chinagd.rules import *


class SkuSpider(BaseSpider):
    name = "sku"
    sku_url = "https://www.neep.shop/rest/service/routing/nouser/searchBarService"

    # 覆盖配置
    custom_settings = {
        'DOWNLOAD_DELAY': 2
    }

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 100
        settings = get_project_settings()
        self.ladders = settings.get('PRICE_LADDER', [])
        self.overflow = settings.get('PRICE_OVERFLOW', 5)
        self.max_page_limit = math.ceil(5000 / self.page_size)

    def start_requests(self):
        # 使用供应商爬取
        supplier_list = SupplierDao().get_batch_supplier_list(self.batch_no, fields={'_id': 1, 'name': 1})
        if supplier_list:
            page = 1
            for supplier in supplier_list:
                sp_id = supplier.get('_id')
                # 办公类
                if int(sp_id) > 3:
                    continue
                sp_name = supplier.get('name')
                queryParams = [{"filterId": "supplier_id_name", "filterName": "供应商", "filterValues": [sp_id]}]

                # 使用价格阶梯
                for price in range(1, len(self.ladders)):
                    start_price = self.ladders[price - 1]
                    # 冗余
                    end_price = self.ladders[price] + 1
                    yield scrapy.FormRequest(
                        url=self.sku_url,
                        method="POST",
                        meta={
                            'reqType': 'sku',
                            'page': page,
                            'batchNo': self.batch_no,
                            'supplierId': sp_id,
                            'startPrice': start_price,
                            'endPrice': end_price
                        },
                        headers={
                            'Content-Type': 'application/x-www-form-urlencoded'
                        },
                        formdata={
                            'pageNo': str(page),
                            'pageSize': str(self.page_size),
                            'queryParams': str(queryParams),
                            'supplierId': sp_id,
                            'minSalesPrice': str(start_price),
                            'maxSalesPrice': str(end_price)
                        },
                        callback=self.parse_sku_list,
                        errback=self.error_back,
                        priority=60,
                    )

        else:
            self.logger.error('无供应商: %s' % self.batch_no)

        # 以分类采集MRO
        cat3_list = CatalogDao().get_batch_cat_list(self.batch_no, fields={'_id': 1, 'catalogName': 1},
                                                    query={"level": 3})
        if cat3_list:
            _page = 1
            for cat3 in cat3_list:
                cat3_id = cat3.get('_id')
                _queryParams = fill_sku_params({"filterValues": [cat3_id]})
                body = {
                    'pageNo': str(_page),
                    'pageSize': str(self.page_size),
                    'queryParams': str(_queryParams)
                }
                yield scrapy.FormRequest(
                    url=self.sku_url,
                    method="POST",
                    meta={
                        'reqType': 'sku',
                        'page': _page,
                        'batchNo': self.batch_no,
                        'catalogId': cat3_id
                    },
                    headers={
                        'Content-Type': 'application/x-www-form-urlencoded'
                    },
                    formdata=body,
                    callback=self.parse_sku_mro_list,
                    errback=self.error_back,
                    priority=50,
                )
        else:
            self.logger.error('无分类: %s' % self.batch_no)

    def parse_sku_list(self, response):
        if response and response.text:
            meta = response.meta

            sku_list = parse_sku_list(response)
            if sku_list and len(sku_list) > 0:
                yield Box("sku", self.batch_no, sku_list)

            page = meta.get("page")
            supplier_id = meta.get("supplierId")
            start_price = meta.get("startPrice")
            end_price = meta.get("endPrice")
            overflow = meta.get("overflow")

            data_json = json.loads(response.text)
            total_count = data_json.get('data', {}).get('totalCount', 0)
            total_page = math.ceil(int(total_count) / self.page_size)
            self.logger.info(
                '清单：spId=%s, price=<%s~%s>, page=%s, cnt=%s' % (supplier_id, start_price, end_price, page, total_count))

            # 有更多页，并发请求
            queryParams = [{"filterId": "supplier_id_name", "filterName": "供应商", "filterValues": [supplier_id]}]
            if page == 1 and len(sku_list) >= self.page_size:
                if total_page >= self.max_page_limit:
                    if overflow:
                        self.logger.error('超限分段超限: spId=%s,price=<%s~%s>, total=%s' % (
                            supplier_id, start_price, end_price, total_count))
                    else:
                        self.logger.info(
                            '页数超限: spId=%s,price=<%s~%s>, total=%s' % (
                                supplier_id, start_price, end_price, total_count))
                        # 价格区间细分
                        step = int((end_price - start_price) / self.overflow)
                        ladder = [x for x in range(start_price, end_price, step)]
                        ladder.append(end_price)
                        for price in range(1, len(ladder)):
                            st_price = ladder[price - 1]
                            # 冗余
                            ed_price = ladder[price] + 1
                            yield scrapy.FormRequest(
                                url=self.sku_url,
                                method="POST",
                                meta={
                                    'reqType': 'sku',
                                    'page': page,
                                    'batchNo': self.batch_no,
                                    'supplierId': supplier_id,
                                    'startPrice': st_price,
                                    'endPrice': ed_price,
                                    'overflow': self.overflow
                                },
                                headers={
                                    'Content-Type': 'application/x-www-form-urlencoded'
                                },
                                formdata={
                                    'pageNo': str(page),
                                    'pageSize': str(self.page_size),
                                    'queryParams': str(queryParams),
                                    'supplierId': supplier_id,
                                    'minSalesPrice': str(st_price),
                                    'maxSalesPrice': str(ed_price)
                                },
                                callback=self.parse_sku_list,
                                errback=self.error_back,
                                priority=60,
                            )

                else:
                    for idx in range(2, total_page + 1):
                        yield scrapy.FormRequest(
                            url=self.sku_url,
                            method="POST",
                            meta={
                                'reqType': 'sku',
                                'page': idx,
                                'batchNo': self.batch_no,
                                'supplierId': supplier_id,
                                'startPrice': start_price,
                                'endPrice': end_price
                            },
                            headers={
                                'Content-Type': 'application/x-www-form-urlencoded',
                            },
                            formdata={
                                'pageNo': str(idx),
                                'pageSize': str(self.page_size),
                                'queryParams': str(queryParams),
                                'supplierId': supplier_id,
                                'minSalesPrice': str(start_price),
                                'maxSalesPrice': str(end_price)
                            },
                            callback=self.parse_sku_list,
                            errback=self.error_back,
                            priority=80
                        )

    def parse_sku_mro_list(self, response):
        if response and response.text:
            meta = response.meta

            sku_list = parse_sku_list(response)
            if sku_list and len(sku_list) > 0:
                yield Box("sku", self.batch_no, sku_list)

            page = meta.get("page")
            catalog_id = meta.get("catalogId")
            start_price = meta.get("startPrice")
            end_price = meta.get("endPrice")

            data_json = json.loads(response.text)
            total_count = data_json.get('data', {}).get('totalCount', 0)
            total_page = math.ceil(int(total_count) / self.page_size)
            self.logger.info(
                '清单：cat=%s, price=<%s~%s>, page=%s, cnt=%s' % (catalog_id, start_price, end_price, page, total_count))

            # 有更多页，并发请求
            query_params = fill_sku_params({"filterValues": [catalog_id]})
            if page == 1 and len(sku_list) >= self.page_size:
                if total_page >= self.max_page_limit:
                    if start_price or end_price:
                        self.logger.info('页数超限: cat=%s, total=%s' % (catalog_id, total_count))
                    else:
                        # 使用价格阶梯
                        for price in range(1, len(self.ladders)):
                            start_price = self.ladders[price - 1]
                            # 冗余
                            end_price = self.ladders[price] + 1
                            yield scrapy.FormRequest(
                                url=self.sku_url,
                                method="POST",
                                meta={
                                    'reqType': 'sku',
                                    'page': page,
                                    'batchNo': self.batch_no,
                                    'catalogId': catalog_id,
                                    'startPrice': start_price,
                                    'endPrice': end_price
                                },
                                headers={
                                    'Content-Type': 'application/x-www-form-urlencoded'
                                },
                                formdata={
                                    'pageNo': str(page),
                                    'pageSize': str(self.page_size),
                                    'queryParams': str(query_params),
                                    'minSalesPrice': str(start_price),
                                    'maxSalesPrice': str(end_price)
                                },
                                callback=self.parse_sku_mro_list,
                                errback=self.error_back,
                                priority=60,
                            )
                else:
                    for idx in range(2, total_page + 1):
                        form_data = {
                            'pageNo': str(idx),
                            'pageSize': str(self.page_size),
                            'queryParams': str(query_params)
                        }
                        meta_page = {
                            'reqType': 'sku',
                            'page': idx,
                            'batchNo': self.batch_no,
                            'catalogId': catalog_id
                        }
                        # 是否使用价格阶梯查询
                        if start_price or end_price:
                            form_data['minSalesPrice'] = str(start_price)
                            form_data['maxSalesPrice'] = str(end_price)
                            meta_page['startPrice'] = start_price
                            meta_page['endPrice'] = end_price
                        yield scrapy.FormRequest(
                            url=self.sku_url,
                            method="POST",
                            meta=meta_page,
                            headers={
                                'Content-Type': 'application/x-www-form-urlencoded',
                            },
                            formdata=form_data,
                            callback=self.parse_sku_mro_list,
                            errback=self.error_back,
                            priority=80
                        )


